county_fips = read_csv("./data/all-geocodes-v2016.csv", skip = 4) %>%
janitor::clean_names() %>%
rename(county_name = area_name_including_legal_statistical_area_description) %>%
filter(county_code_fips != "000") %>%
mutate(state_county_fips = paste0(state_code_fips, county_code_fips)) %>%
select (state_code_fips, county_code_fips, state_county_fips, county_name)
## Parsed with column specification:
## cols(
## `Summary Level` = col_character(),
## `State Code (FIPS)` = col_character(),
## `County Code (FIPS)` = col_character(),
## `County Subdivision Code (FIPS)` = col_character(),
## `Place Code (FIPS)` = col_character(),
## `Consolidtated City Code (FIPS)` = col_character(),
## `Area Name (including legal/statistical area description)` = col_character()
## )
state_fips = read_csv("./data/state-geocodes-v2016.csv", skip = 5) %>%
janitor::clean_names() %>%
filter(state_fips != "00") %>%
select(state_fips, name) %>%
mutate(state_fips = as.numeric(state_fips)) %>%
rename(state = name)
## Parsed with column specification:
## cols(
## Region = col_double(),
## Division = col_double(),
## `State (FIPS)` = col_character(),
## Name = col_character()
## )
bee_county = read_csv("./data/bee_county/Bee_Colony_Census_Data_by_County 2.csv") %>%
janitor::clean_names() %>%
select(year,
state,
state_ansi,
ag_district,
ag_district_code,
county,
county_ansi,
value,
cv_percent)
## Parsed with column specification:
## cols(
## Year = col_double(),
## Period = col_character(),
## State = col_character(),
## `State ANSI` = col_double(),
## `Ag District` = col_character(),
## `Ag District Code` = col_double(),
## County = col_character(),
## `County ANSI` = col_double(),
## Value = col_character(),
## `CV (%)` = col_character()
## )
bee_county$state_ansi = stringr::str_pad(bee_county$state_ansi,2 , pad = "0")
bee_county$county_ansi = stringr::str_pad(bee_county$county_ansi,3 , pad = "0")
bee_county = bee_county%>%
mutate(state_county_fips = paste0(state_ansi, county_ansi),
value = replace(value, value == "(D)", "NA"),
cv_percent = replace(cv_percent, cv_percent == "(D)", "NA"),
colony_count = value)
#create function to import data
file_name <- list.files(path = "./data/bee_state_2/")
df = read_csv(file = str_c("./data/bee_state_2/", file_name[1])) %>%
mutate(file = file_name[1])
## Parsed with column specification:
## cols(
## `2` = col_double(),
## t = col_character(),
## `Honey: Released February 28, 2002, by the National Agricultural Statistics Service (NASS), Agricultural Statistics Board, U.S. Department of Agriculture.` = col_character(),
## X4 = col_character(),
## X5 = col_character(),
## X6 = col_character(),
## X7 = col_character(),
## X8 = col_character(),
## X9 = col_character()
## )
df
## # A tibble: 51 x 10
## `2` t `Honey: Released … X4 X5 X6 X7 X8 X9 file
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2 t Honey: Number of… <NA> <NA> <NA> <NA> <NA> <NA> 2002…
## 2 2 t and Value by Stat… <NA> <NA> <NA> <NA> <NA> <NA> 2002…
## 3 2 h <NA> <NA> <NA> <NA> <NA> <NA> <NA> 2002…
## 4 2 h <NA> Honey Yield <NA> <NA> Aver… Value 2002…
## 5 2 h State Prod… per Prod… Stoc… Pric… of 2002…
## 6 2 h <NA> Colo… Colo… <NA> Dec … Poun… Prod… 2002…
## 7 2 h <NA> <NA> <NA> <NA> <NA> <NA> <NA> 2002…
## 8 2 u <NA> 1,000 Poun… 1,00… 1,00… Cents 1,00… 2002…
## 9 2 d AL 16 78 1248 187 59 736 2002…
## 10 2 d AZ 40 59 2360 1322 73 1723 2002…
## # … with 41 more rows
my_read_csv = function(x){
df = read_csv(x, skip = 9, col_names = F)
}
my_read_csv = function(x){
df = read_csv(x, skip = 9, col_names = F)
}
bee_data =
tibble(
file_names = file_name,
path = str_c("./data/bee_state_2/", file_names)
) %>%
mutate(data = map(path, my_read_csv))%>%
unnest()
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
## Parsed with column specification:
## cols(
## X1 = col_double(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double(),
## X8 = col_double(),
## X9 = col_double()
## )
#clean data set
clean_bee_data =
bee_data %>%
separate(file_names, into = c("year", "remove"), sep = ".c") %>%
select(-remove, -X1, -X2, -path) %>%
rename(state = X3, honey_producing_colonies = X4, yield_per_colony = X5, production = X6, stocks = X7, price_per_pound = X8, production_value = X9) %>%
drop_na(state) %>%
mutate(state = recode(state, AL = "Alabama", AR = "Arkansas", AZ = "Arizona", CA = "California", CO = "Colorado", FL = "Florida", GA = "Georgia", HI = "Hawaii", IA = "Iowa", IL = "Illinois", ID = "Idaho", IN = "Indiana", KS = "Kansas", KY = "Kentucky", LA = "Louisiana", ME = "Maine", MD = "Maryland", MI = "Michigan", MN = "Minnesota", MO = "Missouri", MT = "Montana", MS = "Mississippi", NC = "North Carolina", ND = "North Dakota", NE = "Nebraska", NJ = "New Jersey", NM = "New Mexico", NV = "Nevada", NY = "New York", OH = "Ohio", OK = "Oklahoma", OR = "Oregon", PA = "Pennsylvania", SC = "South Carolina", SD = "South Dakota", TN = "Tennessee", TX = "Texas", UT = "Utah", VA = "Virginia", VT = "Vermont", WA = "Washington", WV = "West Virginia", WI = "Wisconsin", WY = "Wyoming" ))
clean_bee_data %>%
ggplot(aes(x = year, y = honey_producing_colonies, color = state)) +
geom_point()

pest_2002 = read_excel("./data/pesticides_csv/EPest.county.estimates.2002.xlsx") %>%
janitor::clean_names() %>%
mutate(
state_fips = state_fips_code,
county_fips = county_fips_code,
state_county_fips = paste0(state_fips, county_fips),
epest_low_kg = round(epest_low_kg),
epest_high_kg = round(epest_high_kg)) %>%
select(-state_fips_code, -county_fips_code)
pest_2002 %>%
group_by(year, state_county_fips, compound) %>%
ggplot(aes(x = compound, y = epest_high_kg)) +
geom_col()
unique(pull(pest_2002, compound))
state_bee_fips = full_join(clean_bee_data, state_fips, by = "state")
top_pesticides = read_csv("./data/top_pesticides.csv") %>%
mutate(state_fips = as.numeric(state_fips))
## Parsed with column specification:
## cols(
## X1 = col_double(),
## compound = col_character(),
## year = col_double(),
## epest_low_kg = col_double(),
## epest_high_kg = col_double(),
## state_fips = col_character(),
## county_fips = col_character(),
## state_county_fips = col_character()
## )
state_pest_data =
top_pesticides %>%
group_by(state_fips, year, compound) %>%
summarize(low = sum(epest_low_kg), high = sum(epest_high_kg))
state_bee_yrfips =
state_bee_fips %>%
mutate(yearfips = paste0(year, state_fips))
state_pest_yrfips =
state_pest_data %>%
mutate(yearfips = paste0(year, state_fips))
merged_state_data = full_join(state_bee_yrfips, state_pest_yrfips, by = "yearfips") %>%
select(year.x, state, state_fips.x, honey_producing_colonies, yield_per_colony, production, compound, low, high) %>%
drop_na(year.x, state, compound, high)
county_bee_yrfips =
bee_county %>%
mutate(yearfips = paste0(year, state_county_fips))
county_pest_yrfips =
top_pesticides %>%
mutate(state_county_fips = as.numeric(state_county_fips) ,
yearfips = paste0(year, state_county_fips))
merged_county_data = full_join(county_bee_yrfips, county_pest_yrfips, by = "yearfips") %>%
select(year.x, state, county, county_fips, colony_count, compound, epest_low_kg, epest_high_kg) %>%
drop_na(year.x, state, county, colony_count, compound, epest_high_kg)
#correlation of state level means across all years
corr_state = read_csv("./data/top_pesticides.csv") %>%
group_by(compound, state_fips) %>%
summarise(mean_pest_high = mean(epest_high_kg, na.rm = TRUE)) %>%
pivot_wider(
names_from = "compound",
values_from = "mean_pest_high",
) %>%
select(-state_fips)
## Parsed with column specification:
## cols(
## X1 = col_double(),
## compound = col_character(),
## year = col_double(),
## epest_low_kg = col_double(),
## epest_high_kg = col_double(),
## state_fips = col_character(),
## county_fips = col_character(),
## state_county_fips = col_character()
## )
corr_state %>%
view()
matrix_state_1 = cor(corr_state, use = "everything", method = c("pearson"))
matrix_state_1
## CHLOROTHALONIL CHLORPYRIFOS CLOTHIANIDIN FIPRONIL
## CHLOROTHALONIL 1.0000000 0.4802217 0.0571605 NA
## CHLORPYRIFOS 0.4802217 1.0000000 0.5316680 NA
## CLOTHIANIDIN 0.0571605 0.5316680 1.0000000 NA
## FIPRONIL NA NA NA 1
## IMIDACLOPRID 0.4953138 0.8741880 0.3934778 NA
## THIACLOPRID NA NA NA NA
## IMIDACLOPRID THIACLOPRID
## CHLOROTHALONIL 0.4953138 NA
## CHLORPYRIFOS 0.8741880 NA
## CLOTHIANIDIN 0.3934778 NA
## FIPRONIL NA NA
## IMIDACLOPRID 1.0000000 NA
## THIACLOPRID NA 1
matrix_state_2 = cor(corr_state, use = "complete.obs", method = c("pearson"))
matrix_state_2
## CHLOROTHALONIL CHLORPYRIFOS CLOTHIANIDIN FIPRONIL
## CHLOROTHALONIL 1.0000000 0.70246745 0.3117681 -0.122623772
## CHLORPYRIFOS 0.7024675 1.00000000 0.5756553 0.001349550
## CLOTHIANIDIN 0.3117681 0.57565529 1.0000000 0.506889870
## FIPRONIL -0.1226238 0.00134955 0.5068899 1.000000000
## IMIDACLOPRID 0.6541162 0.97497272 0.5498456 -0.003886203
## THIACLOPRID 0.1766534 0.12367034 -0.1679516 -0.056247127
## IMIDACLOPRID THIACLOPRID
## CHLOROTHALONIL 0.654116214 0.176653447
## CHLORPYRIFOS 0.974972715 0.123670345
## CLOTHIANIDIN 0.549845616 -0.167951571
## FIPRONIL -0.003886203 -0.056247127
## IMIDACLOPRID 1.000000000 -0.002073838
## THIACLOPRID -0.002073838 1.000000000
#correlation of country level mean by year
corr_country = read_csv("./data/top_pesticides.csv") %>%
group_by(compound, year) %>%
summarise(mean_pest_high = mean(epest_high_kg, na.rm = TRUE)) %>%
pivot_wider(
names_from = "compound",
values_from = "mean_pest_high",
) %>%
ungroup() %>%
select(-year)
## Parsed with column specification:
## cols(
## X1 = col_double(),
## compound = col_character(),
## year = col_double(),
## epest_low_kg = col_double(),
## epest_high_kg = col_double(),
## state_fips = col_character(),
## county_fips = col_character(),
## state_county_fips = col_character()
## )
matrix_country_1 = cor(corr_country, use = "everything", method = c("pearson"))
matrix_country_1
## CHLOROTHALONIL CHLORPYRIFOS CLOTHIANIDIN FIPRONIL
## CHLOROTHALONIL 1.00000000 -0.30707012 0.05973758 -0.59954253
## CHLORPYRIFOS -0.30707012 1.00000000 0.19330789 -0.02526745
## CLOTHIANIDIN 0.05973758 0.19330789 1.00000000 -0.45653473
## FIPRONIL -0.59954253 -0.02526745 -0.45653473 1.00000000
## IMIDACLOPRID 0.22768878 -0.14992739 0.87269447 -0.58062939
## THIACLOPRID -0.01268331 -0.28960638 0.31040782 -0.29736183
## IMIDACLOPRID THIACLOPRID
## CHLOROTHALONIL 0.2276888 -0.01268331
## CHLORPYRIFOS -0.1499274 -0.28960638
## CLOTHIANIDIN 0.8726945 0.31040782
## FIPRONIL -0.5806294 -0.29736183
## IMIDACLOPRID 1.0000000 0.40945350
## THIACLOPRID 0.4094535 1.00000000
matrix_country_2 = cor(corr_country, use = "complete.obs", method = c("pearson"))
matrix_country_2
## CHLOROTHALONIL CHLORPYRIFOS CLOTHIANIDIN FIPRONIL
## CHLOROTHALONIL 1.00000000 -0.30707012 0.05973758 -0.59954253
## CHLORPYRIFOS -0.30707012 1.00000000 0.19330789 -0.02526745
## CLOTHIANIDIN 0.05973758 0.19330789 1.00000000 -0.45653473
## FIPRONIL -0.59954253 -0.02526745 -0.45653473 1.00000000
## IMIDACLOPRID 0.22768878 -0.14992739 0.87269447 -0.58062939
## THIACLOPRID -0.01268331 -0.28960638 0.31040782 -0.29736183
## IMIDACLOPRID THIACLOPRID
## CHLOROTHALONIL 0.2276888 -0.01268331
## CHLORPYRIFOS -0.1499274 -0.28960638
## CLOTHIANIDIN 0.8726945 0.31040782
## FIPRONIL -0.5806294 -0.29736183
## IMIDACLOPRID 1.0000000 0.40945350
## THIACLOPRID 0.4094535 1.00000000
#visualization
corrplot(matrix_state_2, order = "hclust", type = "full")

corrplot(matrix_country_2, order = "hclust", type = "full")

pest_country = read_csv("./data/top_pesticides.csv") %>%
group_by(compound, year) %>%
summarise(mean_pest_high = mean(epest_high_kg, na.rm = TRUE)) %>%
ggplot(aes(x = year, y = mean_pest_high, color = compound)) +
geom_line()
## Parsed with column specification:
## cols(
## X1 = col_double(),
## compound = col_character(),
## year = col_double(),
## epest_low_kg = col_double(),
## epest_high_kg = col_double(),
## state_fips = col_character(),
## county_fips = col_character(),
## state_county_fips = col_character()
## )
pest_country

pest_state = read_csv("./data/top_pesticides.csv") %>%
group_by(year, compound, state_fips) %>%
summarise(mean_pest_high = mean(epest_high_kg, na.rm = TRUE)) %>%
ggplot(aes(x = year, y = mean_pest_high, color = compound)) +
geom_line() +
facet_grid(~state_fips)
## Parsed with column specification:
## cols(
## X1 = col_double(),
## compound = col_character(),
## year = col_double(),
## epest_low_kg = col_double(),
## epest_high_kg = col_double(),
## state_fips = col_character(),
## county_fips = col_character(),
## state_county_fips = col_character()
## )
pest_state
